package com.xavient.dip.storm.topology;
import java.util.HashMap;
import java.util.Map;
import com.xavient.dip.common.AppArgs;
import com.xavient.dip.common.config.DiPConfiguration;
import com.xavient.dip.common.exceptions.DataIngestException;
import com.xavient.dip.common.utils.CmdLineParser;
import com.xavient.dip.storm.bolt.LocationByTweets;
import com.xavient.dip.storm.bolt.MySQLDataWriterBolt;
import com.xavient.dip.storm.bolt.TopNLocationByTweets;
import com.xavient.dip.storm.bolt.TopNUsersWithMaxFollowers;
import com.xavient.dip.storm.bolt.TwitterRawJsonConvertorBolt;
import com.xavient.dip.storm.bolt.UsersWithMaxFollowers;
import com.xavient.dip.storm.builder.HBaseBoltBuilder;
import com.xavient.dip.storm.builder.HdfsBoltBuilder;
import com.xavient.dip.storm.spout.kafka.KafkaSpoutFactory;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.generated.StormTopology;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class DataIngestionWindowBasedTopology {
public static final String KAFKA_SPOUT_ID = "kafka-spout";
public static final String FILTER_BOLT_ID = "filter-bolt";
public static final String HDFS_BOLT_ID = "hdfs-bolt";
public static final String HBASE_BOLT_ID = "hbase-bolt";
public static void main(String[] args) throws DataIngestException {
CmdLineParser parser = new CmdLineParser();
AppArgs appArgs = parser.validateArgs(args);
Map<String, Object> hbaseConfig = new HashMap<>();
for (final String name : appArgs.getProperties().stringPropertyNames())
hbaseConfig.put(name, appArgs.getProperties().getProperty(name));
System.setProperty("HADOOP_USER_NAME", appArgs.getProperty(DiPConfiguration.HADOOP_USER_NAME));
Config stormConf = new Config();
stormConf.put("hbaseConfig", hbaseConfig);
stormConf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, Integer.valueOf(appArgs.getProperty("batchIntervalInSec","10")));
stormConf.put("rankMaxThreshold", appArgs.getProperty("rankMaxThreshold", "100"));
Map<String, Object> dbProperties = new HashMap<>();
for (final String name : appArgs.getProperties().stringPropertyNames())
dbProperties.put(name, appArgs.getProperties().getProperty(name));
stormConf.put("dbProperties", dbProperties);
new LocalCluster().submitTopology("DataIngestion", stormConf, buildTopology(appArgs));
/*try {
StormSubmitter.submitTopology("DataIngestion", stormConf, buildTopology(appArgs));
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
throw new DataIngestException(e.getMessage());
}*/
}
private static StormTopology buildTopology(AppArgs appArgs) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout(KAFKA_SPOUT_ID, KafkaSpoutFactory.getKafkaSpout(appArgs), 2);
builder.setBolt(FILTER_BOLT_ID, new TwitterRawJsonConvertorBolt())
.shuffleGrouping(KAFKA_SPOUT_ID);
builder.setBolt(HDFS_BOLT_ID, HdfsBoltBuilder.build(appArgs))
.shuffleGrouping(FILTER_BOLT_ID);
builder.setBolt(HBASE_BOLT_ID,
HBaseBoltBuilder.build(appArgs, "hbaseConfig"))
.shuffleGrouping(FILTER_BOLT_ID);
builder.setBolt("USERS_MAX_FOLLOWERS", new UsersWithMaxFollowers()).shuffleGrouping(FILTER_BOLT_ID);
builder.setBolt("TOPN_USERS_MAX_FOLLOWERS", new TopNUsersWithMaxFollowers())
.globalGrouping("USERS_MAX_FOLLOWERS");
builder.setBolt("LOCATION_BY_TWEETS", new LocationByTweets()).shuffleGrouping(FILTER_BOLT_ID);
builder.setBolt("TOPN_LOCATION_BY_TWEETS", new TopNLocationByTweets()).globalGrouping("LOCATION_BY_TWEETS");
builder.setBolt("MYSQL_WRITER", new MySQLDataWriterBolt(),2)
.fieldsGrouping("TOPN_USERS_MAX_FOLLOWERS", new Fields("tableName"))
.fieldsGrouping("TOPN_LOCATION_BY_TWEETS", new Fields("tableName"));
return builder.createTopology();
}
}